{
 "metadata": {
  "name": "",
  "signature": "sha256:eb8269dea4c92b0f5b69a1eb49c270a89b8ec0d04146a992a577e2752354f7a8"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "--- Logging error ---\n",
        "Traceback (most recent call last):\n",
        "  File \"/usr/lib/python3.4/logging/__init__.py\", line 982, in emit\n",
        "    self.flush()\n",
        "  File \"/usr/lib/python3.4/logging/__init__.py\", line 962, in flush\n",
        "    self.stream.flush()\n",
        "OSError: [Errno 5] Input/output error\n",
        "Call stack:\n",
        "  File \"<string>\", line 1, in <module>\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/kernelapp.py\", line 469, in main\n",
        "    app.start()\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/kernelapp.py\", line 459, in start\n",
        "    ioloop.IOLoop.instance().start()\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/ioloop.py\", line 151, in start\n",
        "    super(ZMQIOLoop, self).start()\n",
        "  File \"/usr/lib/python3.4/site-packages/tornado/ioloop.py\", line 837, in start\n",
        "    handler_func(fd_obj, events)\n",
        "  File \"/usr/lib/python3.4/site-packages/tornado/stack_context.py\", line 275, in null_wrapper\n",
        "    return fn(*args, **kwargs)\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py\", line 433, in _handle_events\n",
        "    self._handle_recv()\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py\", line 465, in _handle_recv\n",
        "    self._run_callback(callback, msg)\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py\", line 407, in _run_callback\n",
        "    callback(*args, **kwargs)\n",
        "  File \"/usr/lib/python3.4/site-packages/tornado/stack_context.py\", line 275, in null_wrapper\n",
        "    return fn(*args, **kwargs)\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/ipkernel.py\", line 281, in dispatcher\n",
        "    return self.dispatch_shell(stream, msg)\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/ipkernel.py\", line 224, in dispatch_shell\n",
        "    self.log.debug('\\n*** MESSAGE TYPE:%s***', msg_type)\n",
        "Message: '\\n*** MESSAGE TYPE:%s***'\n",
        "Arguments: ('execute_request',)\n",
        "--- Logging error ---\n",
        "Traceback (most recent call last):\n",
        "  File \"/usr/lib/python3.4/logging/__init__.py\", line 982, in emit\n",
        "    self.flush()\n",
        "  File \"/usr/lib/python3.4/logging/__init__.py\", line 962, in flush\n",
        "    self.stream.flush()\n",
        "OSError: [Errno 5] Input/output error\n",
        "Call stack:\n",
        "  File \"<string>\", line 1, in <module>\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/kernelapp.py\", line 469, in main\n",
        "    app.start()\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/kernelapp.py\", line 459, in start\n",
        "    ioloop.IOLoop.instance().start()\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/ioloop.py\", line 151, in start\n",
        "    super(ZMQIOLoop, self).start()\n",
        "  File \"/usr/lib/python3.4/site-packages/tornado/ioloop.py\", line 837, in start\n",
        "    handler_func(fd_obj, events)\n",
        "  File \"/usr/lib/python3.4/site-packages/tornado/stack_context.py\", line 275, in null_wrapper\n",
        "    return fn(*args, **kwargs)\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py\", line 433, in _handle_events\n",
        "    self._handle_recv()\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py\", line 465, in _handle_recv\n",
        "    self._run_callback(callback, msg)\n",
        "  File \"/usr/lib/python3.4/site-packages/zmq/eventloop/zmqstream.py\", line 407, in _run_callback\n",
        "    callback(*args, **kwargs)\n",
        "  File \"/usr/lib/python3.4/site-packages/tornado/stack_context.py\", line 275, in null_wrapper\n",
        "    return fn(*args, **kwargs)\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/ipkernel.py\", line 281, in dispatcher\n",
        "    return self.dispatch_shell(stream, msg)\n",
        "  File \"/usr/lib/python3.4/site-packages/IPython/kernel/zmq/ipkernel.py\", line 225, in dispatch_shell\n",
        "    self.log.debug('   Content: %s\\n   --->\\n   ', msg['content'])\n",
        "Message: '   Content: %s\\n   --->\\n   '\n",
        "Arguments: {'store_history': True, 'user_variables': [], 'allow_stdin': True, 'code': 'import pandas as pd', 'user_expressions': {}, 'silent': False}\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "heading",
     "level": 5,
     "metadata": {},
     "source": [
      "get the stocks and names of the SP500 companies"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sp500_companies = pd.DataFrame.from_csv(\"constituents.csv\") ##used a script from the internet that scrapes spindices\n",
      "sp500_companies['Name']=sp500_companies['Name'].str.replace('Co\\.|Inc|\\.|\\s\\s|\\sCo$|\\sCorp$','') #remove the titles such as Corp and Inc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "company_names = list(sp500_companies['Name']) #create a list for the company names"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Twitter streaming subroutines"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n",
      "import tweepy\n",
      "from tweepy import StreamListener\n",
      "from twitterSentiment import models\n",
      "from datetime import datetime\n",
      "from pytz import timezone\n",
      "import json, time, sys\n",
      "\n",
      "tweetsmax = 10\n",
      "tweetscount = 0\n",
      "\n",
      "def timeupdate(twitterdate):\n",
      "        # method to return a django-supported time from twitter-based time entry\n",
      "        # input comes in the following fashion: Tue Jul 02 14:33:59 +0000 2013\n",
      "        # return 2013-06-18 18:23:22-04:00\n",
      "        central = timezone('US/Central')\n",
      "        return central.localize(datetime.strptime(twitterdate, '%a %b %d %H:%M:%S +0000 %Y'))\n",
      "    \n",
      "class TwitterListener(StreamListener):\n",
      "\n",
      "    \n",
      "    \n",
      "    #ideas from http://digitalfoo.net/posts/using-python-and-tweepy-to-scrape-streaming-tweets-into-mongodb/\n",
      "    #and http://nbviewer.ipython.org/github/alexhanna/hse-twitter/blob/master/docs/Collecting%20Twitter%20data%20from%20the%20API%20with%20Python.ipynb\n",
      "    \n",
      "    def __init__(self, api = None, fprefix = 'streamer'):\n",
      "        self.api = api or API()\n",
      "        self.counter = 0\n",
      "   \n",
      "    def on_data(self, data):\n",
      "        global tweetsmax\n",
      "        global tweetscount\n",
      "        print (tweetsmax)\n",
      "        print (tweetscount)\n",
      "        if (tweetsmax == tweetscount):\n",
      "            tweetscount=0\n",
      "            return False\n",
      "        else:\n",
      "            tweetscount = tweetscount+1\n",
      "        try:\n",
      "            tweet = json.loads(data) #convert twitter stream in json into Python dictionary\n",
      "            if isinstance(tweet, dict):\n",
      "                if tweet['user']['lang'] != 'en':\n",
      "                    return\n",
      "                else:\n",
      "                    print (\"tweet: \", tweet['text'])\n",
      "                    TwitterDatabase(tweet)\n",
      "        except:\n",
      "            print (\"Error in Twitter listener. Error message:\", sys.exc_info())\n",
      "        return\n",
      "    def on_limit(self, track):\n",
      "        print(\">> limit\")\n",
      "        return \n",
      "\n",
      "    def on_error(self, status_code):\n",
      "        print(\">>> error: \", str(status_code) + \"\\n\")\n",
      "        return \n",
      "\n",
      "    def on_timeout(self):\n",
      "        print(\">>> timeout Sleeping for 60 seconds...\\n\")\n",
      "        time.sleep(60)\n",
      "        return \n",
      "\n",
      "  \n",
      "def TwitterDatabase(tweet):\n",
      "    ## take Twitter data in jsonformat and insert into the database\n",
      "    try:\n",
      "        aTweet = models.TwitterText(twitter_user_id=tweet['user']['id'], \n",
      "                                        twitter_user_name=tweet['user']['screen_name'],\n",
      "                                        twitter_text=tweet['text'], \n",
      "                                        twitter_text_id=tweet['id'], \n",
      "                                        twitter_text_timestamp=timeupdate(tweet['created_at']), \n",
      "                                        twitter_text_keyword=keywords)\n",
      "        aTweet.save()\n",
      "    except:\n",
      "        print (\"Error in Django insert tweet. error message:\", sys.exc_info())\n",
      "    return\n",
      "        \n",
      "    \n",
      "def TwitterStreaming(company_names):\n",
      "    ## twitter authentication keys\n",
      "    \n",
      "      \n",
      "\n",
      "    consumer_key        = \"yoWOau00G19Q81WKeVZ6g60zU\"\n",
      "    consumer_secret     = \"A0rJ4XlMndHv2xTeQlA2t7N9thBr3FDRu6vkrCy5ab7KAiKmNB\"\n",
      "    access_token        = \"16859687-bt1jbTlHUXO39n114gWEpg24VlKZQVbaF4AgXs4ha\"\n",
      "    access_token_secret = \"kLMge9f3GypNwHv6N9uMCuUdLS7kr5gfR5lzTXEmwMyfi\"\n",
      "    global keywords\n",
      "    keywords = [company_names]\n",
      "    try:\n",
      "        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
      "        auth.set_access_token(access_token, access_token_secret)\n",
      "        api = tweepy.API(auth)\n",
      "        \n",
      "        listener = TwitterListener(api, \"test\")\n",
      "        print (\"Begin Twitter streaming for \", keywords)\n",
      "        stream = tweepy.Stream(auth, listener)\n",
      "        print (keywords)\n",
      "        stream.filter(track=company_names, follow=\"\")\n",
      "    except:\n",
      "        print (\"Error in Twitter streaming\",sys.exc_info())\n",
      "    return True"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Pick 10 companies to build the Twitter search keyword "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "company_keyword_count = 10\n",
      "company_random = random.choice(company_names, company_keyword_count)\n",
      "companieslist = \",\".join(\"\\\"\"+str(companies.rstrip())+\"\\\"\" for companies in company_random)\n",
      "print (companieslist)\n",
      "\n",
      "companieslist = ['Procter & Gamble']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\"Fidelity National Information Services\",\"Costco\",\"Huntington Bancshares\",\"KeyCorp\",\"Coach\",\"Teradata Corp\",\"American Tower Corp A\",\"Emerson Electric\",\"Prudential Financial\",\"Macy's\"\n"
       ]
      }
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "TwitterStreaming(companieslist)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}